
* ---------------------------------------------------
* Event History Analysis
* Josef Brderl, March 2011
* Entry into Motherhood (data from ALLBUS 2000)
* ---------------------------------------------------

* duration: age at birth of first child - 14 (measured in years only)
* child: =1 if child, =0 if censored at time of interview
* educ: years of education
* east: =1 if born in East Germany, =0 if born in West Germany
* coh: birth cohort dummies


version 11
cd "K:\Vorlesung EHA\Stata Beispiele\"    //working directory


***********************************************
* -----------------------------------
*    EHA with time-varying covariate
* -----------------------------------
***********************************************

use Motherhood.dta, clear
stset duration, id(persnr) failure(child==1)  //id() needed for splitting

stsplit T0, every(1)                    //"person-period" episode splitting
* Note that you do not need to stset the data anew, everything is
* done correctly by stsplit. In particular the response triple is as it should be.
* T0 notes the beginning of the split

* ineduc: indicates the years a woman is still in school
gen ineduc = T0 <= (educ+6-14)          // constructing the time-varying covariate
list persnr _t0 _t _d educ ineduc if persnr<=2, sepby(persnr)

* Cox model with time-varying covariate
stcox educ ineduc east coh2 coh3 coh4 coh5

* And the log-logistic
streg educ ineduc east coh2 coh3 coh4 coh5, dist(loglogistic) 

* Conditional effect plot (stcurve does not work with TVCs)
* Therefore we use this "strange" procedure
preserve
replace east=0 
replace coh2=1 
replace coh3=0 
replace coh4=0 
replace coh5=0
predict pc1, hazard
twoway (line pc1 _t if educ==10.5 & _t<31, sort lcolor(blue) lwidth(thick))  ///
       (line pc1 _t if educ==18   & _t<31, sort lcolor(red) lwidth(thick)),  ///
   ytitle("fertility rate", size(large))                                     ///  
   ylabel(0(.02)0.1, format(%3.2f) labsize(medlarge) angle(horizontal) grid) ///
   xtitle("age - 14", size(large)  margin(0 0 0 2))                          ///  
   xlabel(0(5)30, labsize(medlarge))                                         /// 
   legend(order(1 "10.5 years" 2 "18 years") rows(2) size(medlarge)          ///
          position(11) ring(0))
restore

		  
****************************************************************
* Demonstration that epi-splitting does not affect results
****************************************************************

use Motherhood.dta, clear
stset duration, id(persnr) failure(child==1)  //id() needed for splitting
streg educ east coh2 coh3 coh4 coh5, d(loglogistic)    //before splitting
estimates store LogLog_before
stsplit T0, every(1)                          //"person-period" episode splitting
streg educ east coh2 coh3 coh4 coh5, d(loglogistic)   //after splitting
estimates store LogLog_after
estimates table LogLog_before LogLog_after, se eform stats(N N_sub N_fail risk ll)



*******************************************************************************
* Full Flexibility in Modelling Duration Dependence after Episode Splitting
*******************************************************************************

use Motherhood.dta, clear

* tab duration child
recode duration 40/max=39    //t(max) is defined (only censored obs out there)

stset duration, id(persnr) failure(child==1)  //id() needed for splitting
stsplit T0, every(1)                          //"person-period" episode splitting

generate t    = _t           //time variable is _t
generate t2   = t^2          //quadratic time
generate lnt  = ln(t)        //logarithm ot time
generate lntm = ln(40-t)     //logarithm of inverse time
gen T20 = (_t>=6  & _t<=10)  //early twenties
gen T25 = (_t>=11 & _t<=15)  //late twenties
gen T30 = (_t>=16 & _t<=20)  //early thirties
gen T35 = (_t>=21 & _t<=25)  //late thirties
gen T40 = (_t>=26)           //above forty
tab t, gen(y)                //year dummies

streg educ east coh2 coh3 coh4 coh5,          nohr dist(gomp)//Gompertz(orig)
streg educ east coh2 coh3 coh4 coh5 t,        nohr dist(exp) //Gompertz
est store Gompertz
streg educ east coh2 coh3 coh4 coh5,          nohr dist(weib)//Weibull(orig)
streg educ east coh2 coh3 coh4 coh5 lnt,      nohr dist(exp) //Weibull
est store Weibull
streg educ east coh2 coh3 coh4 coh5 t t2,     nohr dist(exp) //Quadratic model
est store Quadratic
streg educ east coh2 coh3 coh4 coh5 t lnt,    nohr dist(exp) //"LinearLogistic"
est store LinLog
streg educ east coh2 coh3 coh4 coh5 lnt lntm, nohr dist(exp) //"Blossfelds favorite"
est store Blossi
* tab _t _d                             //after time 24 (age 38) events become sparse
streg educ east coh2 coh3 coh4 coh5 y1-y24,   nohr dist(exp) //"Cox"
est store Cox
streg educ east coh2 coh3 coh4 coh5 T20-T40,  nohr dist(exp) //piecewise-constant model
est store PC
streg educ east coh2 coh3 coh4 coh5, dist(loglogistic)       //compare with loglogistic
est store LogLog

estimates stats Gompertz Weibull Quadratic LinLog Blossi Cox PC LogLog


* Plotting the Rate Functions of the Winners
preserve
replace educ=13               //abitur
replace east=0                //West German
replace coh2=1                //cohort 2
replace coh3=0 
replace coh4=0 
replace coh5=0 

est restore Blossi
predict r1, hazard            //Blossi rate
est restore LinLog
predict r2, hazard            //LinearLogistic rate

twoway (line r1 _t if _t<31, sort c(J) lcolor(blue) lwidth(thick))      ///
       (line r2 _t if _t<31, sort c(J) lcolor(red)  lwidth(thick)),     ///
   title("The Winners' rate functions", size(large))                    ///
   ytitle("fertility rate", size(large))                                ///  
   ylabel(0(.025)0.10, labsize(medlarge) angle(horizontal) grid)        ///
   xtitle("age - 14", size(large) margin(medsmall))                     ///  
   xlabel(0(5)30, labsize(medlarge))                                    ///
   legend(order(1 "Blossi" 2 "LinLog") rows(2) size(medlarge) position(1) ring(0))
restore
